ANALYTICS
Photo by Yannis H on Unsplash
Some car accidents are caused by the ignorance or disbelief of the fact
that a driver’s eyes and mind can be thousands of kilometres apart…
— Mokokoma Mokhonoana
GDP
url_root <- "archetypes/"
url_file1 <- "gdp.csv"
url1 <- paste0(url_root, url_file1)
df_gdp <- read.csv(url1, header = TRUE, stringsAsFactors = FALSE, encoding = "UTF-8")
df_gdp
Road deaths
url_file2 <- "road-deaths.csv"
url2 <- paste0(url_root, url_file2)
df_rd <- read.csv(url2, header = TRUE, stringsAsFactors = FALSE, encoding = "UTF-8")
df_rd
df_rd_wrangle <- df_rd %>% mutate(road_deaths = as.integer(str_replace(est_road_traffic_death_rate_per_100k_population_both_sexes, "\\[.+?\\]", "")))
df_rd_wrangle <- df_rd_wrangle %>% filter(year == 2017)
df_rd_wrangle <- df_rd_wrangle %>% select(c(country, road_deaths))
df_rd_wrangle$iso3 <- countrycode(df_rd_wrangle$country, origin='country.name', destination='iso3c')
# df_rd_wrangle
df_combined <- merge(df_rd_wrangle, df_gdp, by.x='iso3', by.y='Country_Code')
df_combined$region <- countrycode(df_combined$iso3, origin='iso3c', destination='un.region.name')
df_combined <- df_combined %>% select(c(iso3, country, road_deaths, X2017, region))
df_combined <- df_combined[complete.cases(df_combined), ]
# df_combined
df_final <- df_combined %>% rename(gdp = X2017)
df_final <- df_final %>% rename_with(toupper)
df_final <- df_final %>% filter(ROAD_DEATHS > 0)
df_final
theme_opts <- theme(
text = element_text(family = "inconsolata", size = 16),
plot.title = element_text(color = "black", size = 16, face = "bold"),
plot.subtitle = element_text(color = "black", size = 12),
plot.caption = element_text(color = "#555555", size = 10),
# axis.title.x = element_blank(),
# axis.title.y = element_blank(),
# axis.text.x = element_text(vjust = 12),
panel.border = element_blank(),
panel.background = element_blank(),
# panel.grid.minor = element_blank(),
# panel.grid.major = element_blank(),
legend.position='top'
)
region_palette <- c(
"Africa" = "#F57C00",
"Americas" = "#00ACC1",
"Asia" = "#90A4AE",
"Europe" = "#E53935",
"Oceania" = "#8BC34A"
)
g1 <- ggplot(df_final, aes(x = GDP, y = ROAD_DEATHS)) +
geom_point(aes( fill = REGION, color = REGION), size = 4, shape = 21) +
# geom_text_repel(aes(label = COUNTRY)) +
scale_color_manual(values = region_palette) +
scale_fill_manual(values = region_palette) +
labs( title = "Driving lessons",
subtitle = NULL,
#caption = "Source: World Bank, IHME",
x = "GDP per capita, PPP (constant 2017 international $)",
y = "Est road traffic death rate per 100k population both sexes, 2017") +
theme_bw() +
theme_opts
girafe(ggobj = g1, width_svg = 16, height_svg = 12,
options = list(opts_sizing(rescale = TRUE, width = 0.75))
)
geom_smooth
We plot the relationship between the estimated road traffic death per 100k people and GDP per capita in 2017 using a linear regression model. With the linear model, we observed a negative relation between the two variables. The GDP per capita is inversely proportional to the road traffic death rate. In other words, the higher is the GDP per capita, the lower is the road traffic rate per 100k population.
We now run a linear regression using function lm
# lm required parameters
# lm(formula, data)
simple_fit = lm(ROAD_DEATHS~GDP, data = df_final) #Create the linear regression
summary(simple_fit) #Review the results
##
## Call:
## lm(formula = ROAD_DEATHS ~ GDP, data = df_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.633 -5.234 -1.334 4.940 28.711
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.30272006 0.79022310 28.22 <0.0000000000000002 ***
## GDP -0.00029960 0.00002767 -10.83 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.448 on 172 degrees of freedom
## Multiple R-squared: 0.4054, Adjusted R-squared: 0.4019
## F-statistic: 117.2 on 1 and 172 DF, p-value: < 0.00000000000000022
g2 <- g1 +
geom_abline(intercept = simple_fit$coefficients[1],
slope = simple_fit$coefficients[2], color = "#33abdd", size = 2, linetype = 'dashed') +
theme_opts
girafe(ggobj = g2, width_svg = 16, height_svg = 12,
options = list(opts_sizing(rescale = TRUE, width = 0.75))
)
label_list <- c(
"Burundi",
"Liberia",
"Haiti",
"India",
"Dominican Republic",
"Thailand",
"China",
"Saudi Arabia",
"Oman",
"United States of America",
"Sweden",
"Ireland",
"Singapore",
"Qatar"
)
df_labels <- filter(df_final, COUNTRY %in% label_list)
df_labels
v2 <- g2 +
geom_text_repel(data=df_labels, aes(label = COUNTRY), box.padding = 0.75, min.segment.length = 0.0, nudge_y = 0.75, point.padding = 0.5, family = "inconsolata") +
geom_hline(yintercept = mean(df_final$ROAD_DEATHS), linetype = 'dashed', size = 1.5, color = '#E53935')
girafe(ggobj = v2, width_svg = 16, height_svg = 12,
options = list(opts_sizing(rescale = TRUE, width = 0.75)))